# Import the necessary libraries
library(readr)
library(dplyr)
library(tidyr)
library(tseries)
library(vars)
library(ggplot2)
library(plotly)
library(stargazer)
library(gridExtra)
library(forecast)
library(lmtest)
# Import the data
data <- read_csv('Sample Media Spend Data.csv')
head(data)
## # A tibble: 6 × 10
## Division Calen…¹ Paid_…² Organ…³ Googl…⁴ Email…⁵ Faceb…⁶ Affil…⁷ Overa…⁸ Sales
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A 1/6/20… 392 422 408 349895. 73580 12072 682 59417
## 2 A 1/13/2… 787 904 110 506270. 11804 9499 853 56806
## 3 A 1/20/2… 81 970 742 430042. 52232 17048 759 48715
## 4 A 1/27/2… 25 575 65 417746. 78640 10207 942 72047
## 5 A 2/3/20… 565 284 295 408506. 40561 5834 658 56235
## 6 A 2/10/2… 256 330 683 434730. 36750 8469 691 56347
## # … with abbreviated variable names ¹Calendar_Week, ²Paid_Views,
## # ³Organic_Views, ⁴Google_Impressions, ⁵Email_Impressions,
## # ⁶Facebook_Impressions, ⁷Affiliate_Impressions, ⁸Overall_Views
dim(data)
## [1] 3051 10
summary(data)
## Division Calendar_Week Paid_Views Organic_Views
## Length:3051 Length:3051 Min. : 1 Min. : 1.0
## Class :character Class :character 1st Qu.: 537 1st Qu.: 712.5
## Mode :character Mode :character Median : 2699 Median : 4110.0
## Mean : 15094 Mean : 13355.7
## 3rd Qu.: 17358 3rd Qu.: 16230.5
## Max. :518190 Max. :270453.0
## Google_Impressions Email_Impressions Facebook_Impressions
## Min. : 7 Min. : 40894 Min. : 29
## 1st Qu.: 169828 1st Qu.: 378497 1st Qu.: 57074
## Median : 490531 Median : 590971 Median : 127523
## Mean : 886174 Mean : 760509 Mean : 269127
## 3rd Qu.: 1022622 3rd Qu.: 962247 3rd Qu.: 283505
## Max. :17150439 Max. :7317730 Max. :7558435
## Affiliate_Impressions Overall_Views Sales
## Min. : 910 Min. : 2 Min. : 15436
## 1st Qu.: 9127 1st Qu.: 747 1st Qu.: 73394
## Median : 16658 Median : 7879 Median : 113573
## Mean : 22911 Mean : 27981 Mean : 185901
## 3rd Qu.: 27486 3rd Qu.: 34112 3rd Qu.: 202976
## Max. :175791 Max. :635057 Max. :3575430
We want to check for any missing or duplicate data.
# Check for missing data
sum(is.na(data))
## [1] 0
# Check for duplicate data
dup <- duplicated(data)
if (any(dup)) {
print("Duplicate data found.")
} else {
print("No duplicate data found.")
}
## [1] "No duplicate data found."
We check the number of unique inputs in the Division column and how many rows correspond to each input.
table(data$Division)
##
## A B C D E F G H I J K L M N O P Q R S T
## 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113 113
## U V W X Y Z
## 113 113 113 113 113 226
There are 26 divisions/regions/states, with an identical amount of data for the first 25 regions, but more data for division Z. The dataset is only over 113 weeks. Thus, we have a closer look at the data for division Z.
# Create a subset of the data with only division Z
data_Z <- data[data$Division == 'Z', ]
dim(data_Z)
## [1] 226 10
data_Z
## # A tibble: 226 × 10
## Division Calendar_W…¹ Paid_…² Organ…³ Googl…⁴ Email…⁵ Faceb…⁶ Affil…⁷ Overa…⁸
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Z 1/6/2018 165 346 440 9.39e5 156226 43811 223
## 2 Z 1/13/2018 101 571 347 1.36e6 24364 37350 611
## 3 Z 1/20/2018 873 128 716 1.15e6 81574 57746 344
## 4 Z 1/27/2018 232 149 64 1.12e6 139465 34921 199
## 5 Z 2/3/2018 773 509 233 1.10e6 80376 27728 786
## 6 Z 2/10/2018 660 3 379 1.17e6 79818 35288 553
## 7 Z 2/17/2018 225 508 639 1.70e6 185419 33811 699
## 8 Z 2/24/2018 123 316 439 1.49e6 139 26692 425
## 9 Z 3/3/2018 390 725 700 1.14e6 28194 28972 938
## 10 Z 3/10/2018 333 393 614 1.27e6 120648 31188 386
## # … with 216 more rows, 1 more variable: Sales <dbl>, and abbreviated variable
## # names ¹Calendar_Week, ²Paid_Views, ³Organic_Views, ⁴Google_Impressions,
## # ⁵Email_Impressions, ⁶Facebook_Impressions, ⁷Affiliate_Impressions,
## # ⁸Overall_Views
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
It seems division Z has 2 entries for each date but with different values in each column. We thus choose to focus on another division for clarity and ease of use, considering not much information is provided about the dataset.
# Create a subset of the data with only division A
data_A <- data[data$Division == 'A', ]
dim(data_A)
## [1] 113 10
data_A
## # A tibble: 113 × 10
## Divis…¹ Calen…² Paid_…³ Organ…⁴ Googl…⁵ Email…⁶ Faceb…⁷ Affil…⁸ Overa…⁹ Sales
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A 1/6/20… 392 422 408 349895. 73580 12072 682 59417
## 2 A 1/13/2… 787 904 110 506270. 11804 9499 853 56806
## 3 A 1/20/2… 81 970 742 430042. 52232 17048 759 48715
## 4 A 1/27/2… 25 575 65 417746. 78640 10207 942 72047
## 5 A 2/3/20… 565 284 295 408506. 40561 5834 658 56235
## 6 A 2/10/2… 256 330 683 434730. 36750 8469 691 56347
## 7 A 2/17/2… 886 56 664 634433. 112489 8331 685 81604
## 8 A 2/24/2… 336 99 470 555036. 218 6319 569 80492
## 9 A 3/3/20… 305 209 501 423690. 13065 7898 772 61804
## 10 A 3/10/2… 955 283 609 471730. 84449 8428 833 64944
## # … with 103 more rows, and abbreviated variable names ¹Division,
## # ²Calendar_Week, ³Paid_Views, ⁴Organic_Views, ⁵Google_Impressions,
## # ⁶Email_Impressions, ⁷Facebook_Impressions, ⁸Affiliate_Impressions,
## # ⁹Overall_Views
## # ℹ Use `print(n = ...)` to see more rows
To gain in ease of analysis, we further transform the data and replace the Calendar_Week column with a number from 1 to 113, corresponding to the weeks recorded. Thus, there are 113 weeks of data, corresponding to 2 years and 9 weeks of data. We also remove the Division and Overall Views columns as their presence is unnecessary. Indeed, the Overall Views column represents the total number of views that a campaign received across all channels. Here however, we want to analyze the relationships and interactions between the different marketing channels. Therefore, it is appropriate to exclude it from our dataset.
data_A <- data_A %>% mutate(Week = row_number())
data_A <- data_A[, !(names(data_A) %in% c("Calendar_Week", "Division", "Overall_Views"))] # Remove columns
data_A <- data_A[c("Week", setdiff(names(data_A), "Week"))] # Move the Week column to the first position
data_A
## # A tibble: 113 × 8
## Week Paid_Views Organic_Views Google_Impress…¹ Email…² Faceb…³ Affil…⁴ Sales
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 392 422 408 349895. 73580 12072 59417
## 2 2 787 904 110 506270. 11804 9499 56806
## 3 3 81 970 742 430042. 52232 17048 48715
## 4 4 25 575 65 417746. 78640 10207 72047
## 5 5 565 284 295 408506. 40561 5834 56235
## 6 6 256 330 683 434730. 36750 8469 56347
## 7 7 886 56 664 634433. 112489 8331 81604
## 8 8 336 99 470 555036. 218 6319 80492
## 9 9 305 209 501 423690. 13065 7898 61804
## 10 10 955 283 609 471730. 84449 8428 64944
## # … with 103 more rows, and abbreviated variable names ¹Google_Impressions,
## # ²Email_Impressions, ³Facebook_Impressions, ⁴Affiliate_Impressions
## # ℹ Use `print(n = ...)` to see more rows
## Plot the Sales data
p1 <- ggplot(data_A, aes(x = Week, y = Sales)) +
geom_line() +
labs(title = "Sales over Time", x = "Week", y = "Sales")
ggplotly(p1)
# Inspect the data patterns for the marketing channels where the spend was made
## Create a new data frame with only the columns of interest
df <- data_A[, c("Week", "Paid_Views", "Organic_Views", "Google_Impressions",
"Email_Impressions", "Facebook_Impressions", "Affiliate_Impressions")]
## Reshape the data frame to long format
df_long <- gather(df, key = "Channel", value = "Value", -Week)
## Plot the data
p2 <- ggplot(df_long, aes(x = Week, y = Value, color = Channel)) +
geom_line() +
labs(x = "Week", y = "Value", title = "Marketing Channel Patterns Over Time")
ggplotly(p2)
From the above output, it seems that the peaks in sales coincide more with a rise in Google, and Facebook Impression. Nevertheless, conducting the VAR model will help determine this with greater precision.
We start by taking the log of each variable to stabilise the variance of each variable over time. First, however, let us check whether there are negative or null values.
if (any(data_A <= 0)) {
print("The dataset contains negative or 0 values.")
} else {
print("The dataset does not contain negative or 0 values.")
}
## [1] "The dataset does not contain negative or 0 values."
The absence of negative or null values lets us directly apply log-transformation without adding one prior to doing so. Indeed, if there were null or negative values, it would be necessary to add 1 to avoid undefined outputs (and so, an error).
# Log-transformation
data_A$LSales <- log(data_A$Sales)
data_A$LPaid_Views <- log(data_A$Paid_Views)
data_A$LOrganic_Views <- log(data_A$Organic_Views)
data_A$LGoogle_Impressions <- log(data_A$Google_Impressions)
data_A$LEmail_Impressions <- log(data_A$Email_Impressions)
data_A$LFacebook_Impressions <- log(data_A$Facebook_Impressions)
data_A$LAffiliate_Impressions <- log(data_A$Affiliate_Impressions)
Once the log-transformation is done, we check the seasonal decomposition of the data. This will allow to determine whether we need to apply first-order or seasonal differencing to further stationarise the data.
Sales
LSales <- ts(data_A$LSales, frequency = 52)
# Plot of time-series
LSales.plot1 <- autoplot(LSales) + ggtitle('Log of Sales Over Time')
# Plot of seasonal decomposition
LSales.plot2 <- LSales %>% stl(s.window = "period") %>% autoplot
grid.arrange(LSales.plot1, LSales.plot2, ncol = 2)
There seems to be a clear trend, although the longer grey bar may suggest it is not as significant. There does however seem to be a seasonal component, bringing the necessity to apply a seasonal difference. We can check whether our analysis is correct by conducting several stationarity tests.
# Stationarity tests
adf.test(LSales) # Augmented Dickey-Fuller Test
##
## Augmented Dickey-Fuller Test
##
## data: LSales
## Dickey-Fuller = -2.9919, Lag order = 4, p-value = 0.1649
## alternative hypothesis: stationary
pp.test(LSales) # Phillips-Perron Unit Root Test
##
## Phillips-Perron Unit Root Test
##
## data: LSales
## Dickey-Fuller Z(alpha) = -27.116, Truncation lag parameter = 4, p-value
## = 0.01143
## alternative hypothesis: stationary
kpss.test(LSales) # KPSS Test for Level Stationarity
##
## KPSS Test for Level Stationarity
##
## data: LSales
## KPSS Level = 0.60744, Truncation lag parameter = 4, p-value = 0.02196
# Seasonal stationarity
nsdiffs(LSales)
## [1] 1
Two out of the three tests say that the data is not stationary, consistent with our analysis of the data, we choose to apply first-order differencing. The nsdiffs function gives an output of 1, indicating that it is necessary to apply seasonal differencing as well.
# Apply differencing
LSales.diff <- diff(LSales, differences = 1) # First-order differencing
LSales.diff <- diff(LSales.diff, differences = 1, lag = 52) # Seasonal differencing for a yearly pattern with weekly data
# Check data is differenced
ndiffs(LSales.diff)
## [1] 0
nsdiffs(LSales.diff)
## [1] 0
Due to the ouput of 0, we know the data is now stationary.
LSales
## Time Series:
## Start = c(1, 1)
## End = c(3, 9)
## Frequency = 52
## [1] 10.99234 10.94740 10.79374 11.18507 10.93729 10.93928 11.30963 11.29591
## [9] 11.03172 11.08128 11.05002 11.08242 11.13418 10.88693 10.99122 11.19337
## [17] 11.05049 11.20422 11.19932 11.05757 11.09635 11.12476 11.18880 11.11100
## [25] 11.13932 11.41223 10.77411 11.61149 11.61962 11.14587 11.20297 10.99289
## [33] 11.35328 11.47564 11.37651 11.19140 11.54014 11.46093 11.50458 11.66837
## [41] 11.57038 11.76128 11.62978 11.96139 12.13820 12.24757 12.73172 12.49238
## [49] 12.14090 12.15734 11.60775 11.10417 11.14923 10.92414 11.31978 11.16785
## [57] 11.13017 11.06822 11.54827 11.30288 11.21976 10.99869 11.37586 11.32521
## [65] 11.11336 11.28625 10.92013 11.23826 11.41833 11.29576 11.25314 11.09492
## [73] 11.37838 11.17191 11.02564 11.52008 11.32904 11.15662 11.06756 11.48748
## [81] 11.30017 11.12707 11.09351 10.85478 11.39041 11.64852 11.40607 11.51172
## [89] 11.59583 11.35299 11.25152 12.00685 11.82162 11.87926 11.81847 11.68239
## [97] 12.27802 12.28809 12.23593 12.86914 12.79052 12.52809 12.13691 11.78108
## [105] 11.76967 11.41841 11.56976 11.24181 11.19100 11.25949 12.03274 11.69189
## [113] 11.50647
Paid Views
LPaid_Views <- ts(data_A$LPaid_Views, frequency = 52)
# Plot of time-series
LPaid_Views.plot1 <- autoplot(LPaid_Views) + ggtitle('Log of Paid Views Over Time')
# Plot of seasonal decomposition
LPaid_Views.plot2 <- LPaid_Views %>% stl(s.window = "period") %>% autoplot
grid.arrange(LPaid_Views.plot1, LPaid_Views.plot2, ncol = 2)
There doesn’t seem to be a significant seasonal component although there may be a trend component (shown by the shorter grey bar), which we can check with stationarity and seasonal stationarity tests.
# Stationarity tests
adf.test(LPaid_Views) # Augmented Dickey-Fuller Test
##
## Augmented Dickey-Fuller Test
##
## data: LPaid_Views
## Dickey-Fuller = -2.7254, Lag order = 4, p-value = 0.2756
## alternative hypothesis: stationary
pp.test(LPaid_Views) # Phillips-Perron Unit Root Test
##
## Phillips-Perron Unit Root Test
##
## data: LPaid_Views
## Dickey-Fuller Z(alpha) = -42.401, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LPaid_Views) # KPSS Test for Level Stationarity
##
## KPSS Test for Level Stationarity
##
## data: LPaid_Views
## KPSS Level = 1.8655, Truncation lag parameter = 4, p-value = 0.01
# Seasonal stationarity
nsdiffs(LPaid_Views)
## [1] 0
Both the ADF and KPSS tests suggest the data is not stationary, whereas the PP test suggests the data is stationary. Taking a majority rule, we apply first-order differencing. Furthermore, the nsdiffs function suggest no seasonal differencing is necessary.
# Apply first-order differencing
LPaid_Views.diff <- diff(LPaid_Views, differences = 1)
# Check data is differenced
ndiffs(LPaid_Views.diff)
## [1] 0
nsdiffs(LPaid_Views.diff)
## [1] 0
Due to the ouput of 0, we know the data is now stationary.
Organic_Views
LOrganic_Views <- ts(data_A$LOrganic_Views, frequency = 52)
# Plot of time-series
LOrganic_Views.plot1 <- autoplot(LOrganic_Views) + ggtitle('Log of Organic Views Over \nTime')
# Plot of seasonal decomposition
LOrganic_Views.plot2 <- LOrganic_Views %>% stl(s.window = "period") %>% autoplot
grid.arrange(LOrganic_Views.plot1, LOrganic_Views.plot2, ncol = 2)
There doesn’t seem to be a significant seasonal component. However, there may be a trend component, demonstrated by the shorter grey bar on the right side of the ‘trend’ panel. Therefore, we may need to apply first order differencing, which we can check with stationarity and seasonal stationarity tests.
# Stationarity tests
adf.test(LOrganic_Views) # Augmented Dickey-Fuller Test
##
## Augmented Dickey-Fuller Test
##
## data: LOrganic_Views
## Dickey-Fuller = -3.3081, Lag order = 4, p-value = 0.07338
## alternative hypothesis: stationary
pp.test(LOrganic_Views) # Phillips-Perron Unit Root Test
##
## Phillips-Perron Unit Root Test
##
## data: LOrganic_Views
## Dickey-Fuller Z(alpha) = -62.955, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LOrganic_Views) # KPSS Test for Level Stationarity
##
## KPSS Test for Level Stationarity
##
## data: LOrganic_Views
## KPSS Level = 2.1264, Truncation lag parameter = 4, p-value = 0.01
# Seasonal stationarity
nsdiffs(LOrganic_Views)
## [1] 0
With p-values larger than 0.05 for the ADF test and smaller than 0.05 for the PP and KPSS test, the LOrganic_Views data is most likely not stationary. It is however stationary in terms of seasonality. We thus need to apply first-order differencing, but not seasonal differency to the LOrganic_Views data.
# Apply first-order differencing
LOrganic_Views.diff <- diff(LOrganic_Views, differences = 1)
# Check data is differenced
ndiffs(LOrganic_Views.diff)
## [1] 0
nsdiffs(LOrganic_Views.diff)
## [1] 0
Due to the ouput of 0, we know the data is now stationary.
Google_Impressions
LGoogle_Impressions <- ts(data_A$LGoogle_Impressions, frequency = 52)
# Plot of time-series
LGoogle_Impressions.plot1 <- autoplot(LGoogle_Impressions) + ggtitle('Log of Google Impressions \nOver Time')
# Plot of seasonal decomposition
LGoogle_Impressions.plot2 <- LGoogle_Impressions %>% stl(s.window = "period") %>% autoplot
grid.arrange(LGoogle_Impressions.plot1, LGoogle_Impressions.plot2, ncol = 2)
There seems to be a trend component, but no seasonal component. Therefore, we may need to apply first-order differencing.
# Stationarity tests
adf.test(LGoogle_Impressions) # Augmented Dickey-Fuller Test
##
## Augmented Dickey-Fuller Test
##
## data: LGoogle_Impressions
## Dickey-Fuller = -3.0443, Lag order = 4, p-value = 0.1432
## alternative hypothesis: stationary
pp.test(LGoogle_Impressions) # Phillips-Perron Unit Root Test
##
## Phillips-Perron Unit Root Test
##
## data: LGoogle_Impressions
## Dickey-Fuller Z(alpha) = -7.8075, Truncation lag parameter = 4, p-value
## = 0.6642
## alternative hypothesis: stationary
kpss.test(LGoogle_Impressions) # KPSS Test for Level Stationarity
##
## KPSS Test for Level Stationarity
##
## data: LGoogle_Impressions
## KPSS Level = 0.99106, Truncation lag parameter = 4, p-value = 0.01
# Seasonal stationarity
nsdiffs(LGoogle_Impressions)
## [1] 0
The ADF (p-value = 0.1432 > 0.05), PP (p-value = 0.6642 > 0.05), and KPSS (p-value = 0.01 < 0.05) tests all suggest the data is not stationary. Therefore, with our prior analysis, we apply first-order differencing. It is unnecessary to apply seasonal differencing.
LGoogle_Impressions.diff <- diff(LGoogle_Impressions, differences = 1)
# Check data is differenced
ndiffs(LGoogle_Impressions.diff)
## [1] 0
nsdiffs(LGoogle_Impressions.diff)
## [1] 0
Due to the ouput of 0, we know the data is now stationary.
Email_Impressions
LEmail_Impressions <- ts(data_A$LEmail_Impressions, frequency = 52)
# Plot of time-series
LEmail_Impressions.plot1 <- autoplot(LEmail_Impressions) + ggtitle('Log of Email Impressions \nOver Time')
# Plot of seasonal decomposition
LEmail_Impressions.plot2 <- LEmail_Impressions %>% stl(s.window = "period") %>% autoplot
grid.arrange(LEmail_Impressions.plot1, LEmail_Impressions.plot2, ncol = 2)
There doesn’t appear to be a trend or a seasonal component in the data.
# Stationarity tests
adf.test(LEmail_Impressions) # Augmented Dickey-Fuller Test
##
## Augmented Dickey-Fuller Test
##
## data: LEmail_Impressions
## Dickey-Fuller = -3.4772, Lag order = 4, p-value = 0.04741
## alternative hypothesis: stationary
pp.test(LEmail_Impressions) # Phillips-Perron Unit Root Test
##
## Phillips-Perron Unit Root Test
##
## data: LEmail_Impressions
## Dickey-Fuller Z(alpha) = -70.972, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LEmail_Impressions) # KPSS Test for Level Stationarity
##
## KPSS Test for Level Stationarity
##
## data: LEmail_Impressions
## KPSS Level = 0.26251, Truncation lag parameter = 4, p-value = 0.1
# Seasonal stationarity
nsdiffs(LEmail_Impressions)
## [1] 0
All tests suggest the data is stationary, thus we do not conduct first-order differencing. Again, we do not need to apply seasonal differencing.
Facebook_Impressions
LFacebook_Impressions <- ts(data_A$LFacebook_Impressions, frequency = 52)
# Plot of time-series
LFacebook_Impressions.plot1 <- autoplot(LFacebook_Impressions) + ggtitle('Log of Facebook \nImpressions Over Time')
# Plot of seasonal decomposition
LFacebook_Impressions.plot2 <- LFacebook_Impressions %>% stl(s.window = "period") %>% autoplot
grid.arrange(LFacebook_Impressions.plot1, LFacebook_Impressions.plot2, ncol = 2)
There is a clear trend in the data although this may not be significant due to the long grey bar. We check this below with stationarity tests.
# Stationarity tests
adf.test(LFacebook_Impressions) # Augmented Dickey-Fuller Test
##
## Augmented Dickey-Fuller Test
##
## data: LFacebook_Impressions
## Dickey-Fuller = -3.5554, Lag order = 4, p-value = 0.0404
## alternative hypothesis: stationary
pp.test(LFacebook_Impressions) # Phillips-Perron Unit Root Test
##
## Phillips-Perron Unit Root Test
##
## data: LFacebook_Impressions
## Dickey-Fuller Z(alpha) = -72.907, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LFacebook_Impressions) # KPSS Test for Level Stationarity
##
## KPSS Test for Level Stationarity
##
## data: LFacebook_Impressions
## KPSS Level = 1.1317, Truncation lag parameter = 4, p-value = 0.01
# Seasonal stationarity
nsdiffs(LFacebook_Impressions)
## [1] 0
Both the ADF and PP tests suggest the data is stationary, unlike the KPSS test. Taking the majority rule and our prior analysis, here, we choose to apply first-order differencing.
# Apply first-order differencing
LFacebook_Impressions.diff <- diff(LFacebook_Impressions, differences = 1)
# Check data is differenced
ndiffs(LFacebook_Impressions.diff)
## [1] 0
nsdiffs(LFacebook_Impressions.diff)
## [1] 0
Due to the ouput of 0, we know the data is now stationary.
Affiliate_Impressions
LAffiliate_Impressions <- ts(data_A$LAffiliate_Impressions, frequency = 52)
# Plot of time-series
LAffiliate_Impressions.plot1 <- autoplot(LAffiliate_Impressions) + ggtitle('Log of Affiliate Impressions \nOver Time')
# Plot of seasonal decomposition
LAffiliate_Impressions.plot2 <- LAffiliate_Impressions %>% stl(s.window = "period") %>% autoplot
grid.arrange(LAffiliate_Impressions.plot1, LAffiliate_Impressions.plot2, ncol = 2)
There doesn’t seem to be a significant seasonal component. However, there is a clear trend component. Therefore, we may need to apply first order differencing, which we can check with stationarity and seasonal stationarity tests.
# Stationarity tests
adf.test(LAffiliate_Impressions) # Augmented Dickey-Fuller Test
##
## Augmented Dickey-Fuller Test
##
## data: LAffiliate_Impressions
## Dickey-Fuller = -2.8295, Lag order = 4, p-value = 0.2324
## alternative hypothesis: stationary
pp.test(LAffiliate_Impressions) # Phillips-Perron Unit Root Test
##
## Phillips-Perron Unit Root Test
##
## data: LAffiliate_Impressions
## Dickey-Fuller Z(alpha) = -50.75, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary
kpss.test(LAffiliate_Impressions) # KPSS Test for Level Stationarity
##
## KPSS Test for Level Stationarity
##
## data: LAffiliate_Impressions
## KPSS Level = 0.49578, Truncation lag parameter = 4, p-value = 0.04262
# Seasonal stationarity
nsdiffs(LAffiliate_Impressions)
## [1] 0
Although according to the PP test we should reject the null hypothesis that the data is not stationary, because the ADF and KPSS tests suggest otherwise and we identified a clear negative trend in the data, we apply first-order differencing. We do not need to apply seasonal differencing.
# Apply first-order differencing
LAffiliate_Impressions.diff <- diff(LAffiliate_Impressions, differences=1)
# Check data is differenced
ndiffs(LAffiliate_Impressions.diff)
## [1] 0
nsdiffs(LAffiliate_Impressions.diff)
## [1] 0
Due to the ouput of 0, we know the data is now stationary.
Summary of the variables used
To summarise, we applied first-order differencing to all variables appart from LEmail_Impressions. The differenced log-transformed variables now should be interpreted as the growth in spending instead of percentages.
# Build the dataset for VAR model
data.ts <- window(cbind(LPaid_Views.diff, LOrganic_Views.diff, LGoogle_Impressions.diff, LEmail_Impressions, LFacebook_Impressions.diff, LAffiliate_Impressions.diff, LSales.diff)) # exclude the first two rows to avoid NA values
data.ts <- na.omit(data.ts) # get rid of NA values
With the above variables, we construct the VAR model.
# Conduct the model and store the results
var_model <- vars::VAR(data.ts, ic = "AIC", lag.max=1, type="const")
lmp <- var_model$varresult
stargazer(lmp, type = "text", dep.var.labels.include = FALSE)
##
## =============================================================================================
## Dependent variable:
## --------------------------------------------------------------
## (1) (2) (3) (4) (5) (6) (7)
## ---------------------------------------------------------------------------------------------
## LPaid_Views.diff.l1 -0.078 0.093 -0.041 -0.026 0.096 -0.015 -0.0002
## (0.143) (0.060) (0.071) (0.064) (0.139) (0.038) (0.066)
##
## LOrganic_Views.diff.l1 -0.043 -0.007 -0.056 -0.094 -0.067 -0.044 -0.075
## (0.334) (0.139) (0.165) (0.151) (0.326) (0.088) (0.154)
##
## LGoogle_Impressions.diff.l1 0.103 0.087 0.054 0.131 -0.004 0.034 0.001
## (0.265) (0.110) (0.130) (0.119) (0.258) (0.070) (0.122)
##
## LEmail_Impressions.l1 0.217 -0.034 -0.019 0.390*** 0.571* -0.063 -0.036
## (0.292) (0.122) (0.144) (0.132) (0.285) (0.077) (0.135)
##
## LFacebook_Impressions.diff.l1 -0.086 0.013 -0.044 0.036 -0.058 0.058 0.079
## (0.149) (0.062) (0.074) (0.067) (0.146) (0.039) (0.069)
##
## LAffiliate_Impressions.diff.l1 0.333 0.070 0.103 0.191 0.316 -0.482*** -0.127
## (0.497) (0.208) (0.245) (0.224) (0.485) (0.131) (0.230)
##
## LSales.diff.l1 0.083 0.005 -0.446*** -0.144 0.049 0.060 -0.429***
## (0.282) (0.118) (0.139) (0.127) (0.275) (0.074) (0.130)
##
## const -2.758 0.467 0.253 7.795*** -7.273* 0.800 0.461
## (3.739) (1.560) (1.844) (1.686) (3.644) (0.982) (1.728)
##
## ---------------------------------------------------------------------------------------------
## Observations 59 59 59 59 59 59 59
## R2 0.040 0.064 0.184 0.183 0.085 0.245 0.220
## Adjusted R2 -0.091 -0.064 0.071 0.071 -0.041 0.142 0.113
## Residual Std. Error (df = 51) 0.630 0.263 0.310 0.284 0.613 0.165 0.291
## F Statistic (df = 7; 51) 0.306 0.502 1.637 1.637 0.677 2.370** 2.054*
## =============================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Carryover effects:
Marketing spending creates both positive and negative carryover effects, with -0.078 units for paid views, -0.007 for organic views, 0.054 for Google impressions, 0.39% for email impressions, -0.058 for facebook impressions, and -0.482 for affiliate impressions, where only email and affiliate impressions are significant at the 1% level.
Cross-over effects:
Paid views has a positive effect on organic views, however it is negligible where a unit increase in paid views increases organic views by 0.093 units. Its effect on facebook impressions is also positive where a unit increase in paid views increases facebook impressions by 0.096 units. It has a negative effect on Google impressions, where a unit increase decreases Google impressions by 0.041. Its effect on email and affiliate impressions are all also negative, where a unit increase in paid views leads to a decrease of 0.026% and 0.015 respectively.
Organic views has a negative effect on all channels, where a one unit increase in organic views decreases paid views by 0.043 units, Google impressions by 0.056, email impressions by 0.094%, facebook impressions by 0.067, and affiliate impressions by 0.044 units.
Google impressions only has a negative effect on facebook impressions, where a one unit increase in Google impressions decreases facebook impressions by 0.004 units. It has however a positive effect on all other marketing spending where a unit increase in Google impressions increases paid views, organic views, email impressions, and affiliate impressions by 0.103 units, 0.087, 0.131%, and 0.034 units respectively.
Email impressions has a negative effect on organic views, Google impressions, and affiliate impressions, where a 1% increase in email impressions decreases organic views, Google impressions, and affiliate impressions by 0.034, 0.019, and 0.063 units respectively. A 1% increase in email impressions increases paid views and facebook impressions by 0.217 and 0.571.
A unit increase in facebook impressions increases organic views by 0.013 units, email impressions by 0.036%, and affiliate impressions by 0.058 units. It decreases paid views and Google impressions by 0.086 and 0.044 units respectively.
Finally, affiliate impressions positively affects all other channels. A unit increase in affiliate impressions increases paid views by 0.333 units, organic views by 0.070 units, Google impressions by 0.103 units, email impressions by 0.191%, and facebook impressions by 0.316 units.
We can note that only one of these crossover effects is significant. Indeed, only the effect of email impressions on facebook impressions is significant. All other crossover effects are negligible.
Feedback Effects:
Sales have both negative and positive feedback effects on the marketing spending. A unit increase in past sales growth impacts paid views, organic views, facebook impressions positively, and affiliate impressions in the following period by 0.083 units, 0.005 units, 0.049 units, and 0.060 units, while it negatively impacts Google and email impressions by 0.446 units and 0.144%. The effect of sales on Google impressions is significant at the 1% level.
Direct Impact:
Google impressions and facebook impressions have a positive impact on sales with a unit increase in Google impressions increasing sales by 0.001 units and a unit increase in facebook impressions increasing sales by 0.079. Paid views, organic views, email impressions, and affiliate impressions have a negative impact on sales where a unit increase in paid views decreases sales growth by 0.0002 units, a unit increase in organic views decreases sales growth by 0.075 units, a 1% increase in email impressions decreases sales growth by 0.036%, and a unit increase in affiliate impression decreases sales by 0.127 units. Here, all effects are negligible.
# Check the residuals
sales.residuals <- data.frame(residuals(var_model))$LSales
sales.residuals <- ts(sales.residuals, frequency = 52)
round(mean(sales.residuals),4)
## [1] 0
autoplot(sales.residuals)
The residual analysis is satisfactory, with a mean of 0. The model can therefore be accepted.
Analyse the IRF plots:
irfs <- irf(var_model, impulse = c('LPaid_Views.diff', 'LOrganic_Views.diff', 'LGoogle_Impressions.diff', 'LEmail_Impressions', 'LFacebook_Impressions.diff', 'LAffiliate_Impressions.diff'), response = 'LSales.diff', runs = 100, n.ahead = 7 , ortho = TRUE, ci=0.95)
plot(irfs)
We analyse below the impact or shock of the impulse series (independent variables) on the response series (dependent variable) and how it progresses over time.
LPaid_Views.diff: an increase in paid views causes an immediate drop in sales, with a delayed increase in revenues in period 2 (end of dust settling, or short-term effects), an effect which tends towards 0 in the long-run.
LOrganic_Views.diff: an increase in organic views causes a drop in revenues, effect which quickly tends to 0.
LGoogle_Impressions: an increase in Google impressions does not cause a response with an effect hovering around 0.
LEmail_Impressions.diff: an increase in email impressions causes an immediate increase in revenues whichdramatically drops the following period and hovers around 0 over time.
LFacebook_Impressions: an increase in facebook impressions causes a delayed boost in revenues in period 2, which drops the following period and tends to 0 over time.
LAffiliate_Impressions.diff: an increase in affiliate impressions creates a negligebale effect.
Granger Causlity:
We can perform the Granger causality tests to confirm whether there is a causal relationship between the variables.
# Perform Granger causality tests
## Does Paid Views Granger-Cause Y?
grangertest(data.ts[, c("LPaid_Views.diff", "LSales.diff")], order = 1)
## Granger causality test
##
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LPaid_Views.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 0.0427 0.837
grangertest(data.ts[, c("LSales.diff", "LPaid_Views.diff")], order = 1)
## Granger causality test
##
## Model 1: LPaid_Views.diff ~ Lags(LPaid_Views.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LPaid_Views.diff ~ Lags(LPaid_Views.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 0.2625 0.6104
## Organic Views
grangertest(data.ts[, c("LOrganic_Views.diff", "LSales.diff")], order = 1)
## Granger causality test
##
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LOrganic_Views.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 0.1561 0.6942
grangertest(data.ts[, c("LSales.diff", "LOrganic_Views.diff")], order = 1)
## Granger causality test
##
## Model 1: LOrganic_Views.diff ~ Lags(LOrganic_Views.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LOrganic_Views.diff ~ Lags(LOrganic_Views.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 0.0248 0.8756
## Google Impressions
grangertest(data.ts[, c("LGoogle_Impressions.diff", "LSales.diff")], order = 1)
## Granger causality test
##
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LGoogle_Impressions.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 0.0067 0.9351
grangertest(data.ts[, c("LSales.diff", "LGoogle_Impressions.diff")], order = 1)
## Granger causality test
##
## Model 1: LGoogle_Impressions.diff ~ Lags(LGoogle_Impressions.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LGoogle_Impressions.diff ~ Lags(LGoogle_Impressions.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 11.061 0.001561 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Email Impressions
grangertest(data.ts[, c("LEmail_Impressions", "LSales.diff")], order = 1)
## Granger causality test
##
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LEmail_Impressions, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 0.0415 0.8394
grangertest(data.ts[, c("LSales.diff", "LEmail_Impressions")], order = 1)
## Granger causality test
##
## Model 1: LEmail_Impressions ~ Lags(LEmail_Impressions, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LEmail_Impressions ~ Lags(LEmail_Impressions, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 1.1474 0.2887
## Facebook Impressions
grangertest(data.ts[, c("LFacebook_Impressions.diff", "LSales.diff")], order = 1)
## Granger causality test
##
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LFacebook_Impressions.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 1.0606 0.3075
grangertest(data.ts[, c("LSales.diff", "LFacebook_Impressions.diff")], order = 1)
## Granger causality test
##
## Model 1: LFacebook_Impressions.diff ~ Lags(LFacebook_Impressions.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LFacebook_Impressions.diff ~ Lags(LFacebook_Impressions.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 0.1876 0.6666
## Affiliate Impressions
grangertest(data.ts[, c("LAffiliate_Impressions.diff", "LSales.diff")], order = 1)
## Granger causality test
##
## Model 1: LSales.diff ~ Lags(LSales.diff, 1:1) + Lags(LAffiliate_Impressions.diff, 1:1)
## Model 2: LSales.diff ~ Lags(LSales.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 0.0875 0.7685
grangertest(data.ts[, c("LSales.diff", "LAffiliate_Impressions.diff")], order = 1)
## Granger causality test
##
## Model 1: LAffiliate_Impressions.diff ~ Lags(LAffiliate_Impressions.diff, 1:1) + Lags(LSales.diff, 1:1)
## Model 2: LAffiliate_Impressions.diff ~ Lags(LAffiliate_Impressions.diff, 1:1)
## Res.Df Df F Pr(>F)
## 1 56
## 2 57 -1 0.5169 0.4751
The Granger causality tests evaluate whether the lagged values of the independent variable significantly predicts the dependent variable, LSales.diff. We can interpret the outcomes as follows:
LPaid_Views.diff: With a p-value of 0.837 > 0.05, we cannot reject the null hypothesis that LPaid_Views.diff does not Granger-cause LSales.diff and thus past values of LPaid_Views.diff are not significantly useful in predicting future values of LSales.diff.
LOrganic_Views.diff: The p-value is 0.6942, greater than the significance level of 0.05. Therefore, LOrganic_Views.diff does not Granger-cause LSales.diff.
LGoogle_Impressions.diff: The p-value is 0.9351 > 0.05, therefore, we fail to reject the null hypothesis that LGoogle_Impressions.diff does not Granger-cause LSales.diff. Past values of LGoogle_Impressions.diff are thus useful in predicting future values of LSales.diff.
LEmail_Impressions: With a p-value of 0.8394 > 0.05, LEmail_Impressions does not Granger-cause LSales.diff.
LFacebook_Impressions.diff: The p-value is 0.3075, mare than the significance level of 0.05. Therefore, LFacebook_Impressions does not Granger-causes LSales.diff.
LAffiliate_Impressions.diff: With a p-value is 0.7685 greater than 0.05, LAffiliate_Impressions.diff does not Granger-cause LSales.diff.
Sales do not Granger-cause any of the marketing channels.
Evaluate the Intermediate and long-term effects:
# Make a table to summarize IRF coefficients and their confidence intervals for each potential marketing spending
## Paid Views
Paid_Views.irf_table <- round(data.frame(period = seq(1, 8),
response.Paid_Views = irfs$irf$LPaid_Views.diff,
Paid_Views.lower = irfs$Lower$LPaid_Views.diff,
Paid_Views.upper = irfs$Upper$LPaid_Views.diff),4)
colnames(Paid_Views.irf_table) <- c('Period', 'LPaid_Views.diff', 'LPaid_Views.diff Lower', 'LPaid_Views.diff Upper')
Paid_Views.irf_table
## Period LPaid_Views.diff LPaid_Views.diff Lower LPaid_Views.diff Upper
## 1 1 -0.0246 -0.0896 0.0576
## 2 2 0.0167 -0.0688 0.0810
## 3 3 -0.0074 -0.0516 0.0439
## 4 4 0.0013 -0.0249 0.0274
## 5 5 -0.0006 -0.0164 0.0123
## 6 6 -0.0003 -0.0093 0.0083
## 7 7 0.0002 -0.0047 0.0050
## 8 8 -0.0002 -0.0033 0.0027
## Organic Views
Organic_Views.irf_table <- round(data.frame(period = seq(1, 8),
response.Organic_Views = irfs$irf$LOrganic_Views.diff,
Organic_Views.lower = irfs$Lower$LOrganic_Views.diff,
Organic_Views.upper = irfs$Upper$LOrganic_Views.diff),4)
colnames(Organic_Views.irf_table) <- c('Period', 'LOrganic_Views.diff', 'LOrganic_Views.diff Lower', 'LOrganic_Views.diff Upper')
Organic_Views.irf_table
## Period LOrganic_Views.diff LOrganic_Views.diff Lower
## 1 1 -0.0543 -0.1158
## 2 2 0.0093 -0.0678
## 3 3 -0.0025 -0.0653
## 4 4 -0.0010 -0.0270
## 5 5 0.0013 -0.0156
## 6 6 -0.0011 -0.0103
## 7 7 0.0007 -0.0050
## 8 8 -0.0004 -0.0033
## LOrganic_Views.diff Upper
## 1 0.0053
## 2 0.0841
## 3 0.0295
## 4 0.0288
## 5 0.0156
## 6 0.0087
## 7 0.0046
## 8 0.0029
## Google Impressions
Google_Impressions.irf_table <- round(data.frame(period = seq(1, 8),
response.Google_Impressions = irfs$irf$LGoogle_Impressions.diff,
Google_Impressions.lower = irfs$Lower$LGoogle_Impressions.diff,
Google_Impressions.upper = irfs$Upper$LGoogle_Impressions.diff),4)
colnames(Google_Impressions.irf_table) <- c('Period', 'LGoogle_Impressions.diff', 'LGoogle_Impressions.diff Lower', 'LGoogle_Impressions.diff Upper')
Google_Impressions.irf_table
## Period LGoogle_Impressions.diff LGoogle_Impressions.diff Lower
## 1 1 -0.0043 -0.0599
## 2 2 0.0072 -0.0503
## 3 3 -0.0089 -0.0486
## 4 4 0.0062 -0.0092
## 5 5 -0.0030 -0.0177
## 6 6 0.0017 -0.0036
## 7 7 -0.0007 -0.0048
## 8 8 0.0004 -0.0021
## LGoogle_Impressions.diff Upper
## 1 0.0511
## 2 0.0697
## 3 0.0210
## 4 0.0346
## 5 0.0059
## 6 0.0100
## 7 0.0031
## 8 0.0025
## Email Impressions
Email_Impressions.irf_table <- round(data.frame(period = seq(1, 8),
response.Email_Impressions = irfs$irf$LEmail_Impressions,
Email_Impressions.lower = irfs$Lower$LEmail_Impressions,
Email_Impressions.upper = irfs$Upper$LEmail_Impressions),4)
colnames(Email_Impressions.irf_table) <- c('Period', 'LEmail_Impressions', 'LEmail_Impressions Lower', 'LEmail_Impressions Upper')
Email_Impressions.irf_table
## Period LEmail_Impressions LEmail_Impressions Lower LEmail_Impressions Upper
## 1 1 0.0541 -0.0151 0.1386
## 2 2 -0.0360 -0.1041 0.0455
## 3 3 0.0260 -0.0053 0.0678
## 4 4 -0.0093 -0.0355 0.0165
## 5 5 0.0052 -0.0041 0.0239
## 6 6 -0.0016 -0.0117 0.0057
## 7 7 0.0007 -0.0027 0.0068
## 8 8 -0.0001 -0.0038 0.0023
## Facebook Impressions
Facebook_Impressions.irf_table <- round(data.frame(period = seq(1, 8),
response.Facebook_Impressions = irfs$irf$LFacebook_Impressions.diff,
Facebook_Impressions.lower = irfs$Lower$LFacebook_Impressions.diff,
Facebook_Impressions.upper = irfs$Upper$LFacebook_Impressions.diff),4)
colnames(Facebook_Impressions.irf_table) <- c('Period', 'LFacebook_Impressions.diff', 'LFacebook_Impressions.diff Lower', 'LFacebook_Impressions.diff Upper')
Facebook_Impressions.irf_table
## Period LFacebook_Impressions.diff LFacebook_Impressions.diff Lower
## 1 1 -0.0109 -0.0778
## 2 2 0.0440 -0.0287
## 3 3 -0.0235 -0.0619
## 4 4 0.0125 -0.0081
## 5 5 -0.0055 -0.0205
## 6 6 0.0024 -0.0028
## 7 7 -0.0009 -0.0069
## 8 8 0.0003 -0.0016
## LFacebook_Impressions.diff Upper
## 1 0.0658
## 2 0.1114
## 3 0.0071
## 4 0.0362
## 5 0.0057
## 6 0.0132
## 7 0.0025
## 8 0.0046
## Affiliate Impressions
Affiliate_Impressions.irf_table <- round(data.frame(period = seq(1, 8),
response.Affiliate_Impressions = irfs$irf$LAffiliate_Impressions.diff,
Affiliate_Impressions.lower = irfs$Lower$LAffiliate_Impressions.diff,
Affiliate_Impressions.upper = irfs$Upper$LAffiliate_Impressions.diff),4)
colnames(Affiliate_Impressions.irf_table) <- c('Period', 'LAffiliate_Impressions.diff', 'LAffiliate_Impressions.diff Lower', 'LAffiliate_Impressions.diff Upper')
Affiliate_Impressions.irf_table
## Period LAffiliate_Impressions.diff LAffiliate_Impressions.diff Lower
## 1 1 -0.0166 -0.0760
## 2 2 -0.0115 -0.0733
## 3 3 0.0158 -0.0346
## 4 4 -0.0117 -0.0431
## 5 5 0.0077 -0.0165
## 6 6 -0.0044 -0.0159
## 7 7 0.0024 -0.0061
## 8 8 -0.0012 -0.0055
## LAffiliate_Impressions.diff Upper
## 1 0.0430
## 2 0.0516
## 3 0.0595
## 4 0.0233
## 5 0.0264
## 6 0.0101
## 7 0.0091
## 8 0.0036
We can the apply the t>1 criteria to determine coefficient significance and calculate long-term elasticities of the different advertising spending.
# Paid Views
Paid_Views.irf_results <- matrix(nrow = 8, ncol = 1)
for (i in 1:8) {
se <- (irfs$Upper$LPaid_Views.diff[i]-irfs$Lower$LPaid_Views.diff[i])/(2*1.96)
Paid_Views.irf_t <- irfs$irf$LPaid_Views.diff[i]/se
if (Paid_Views.irf_t>1) {
Paid_Views.irf_results[i] <- irfs$irf$LPaid_Views.diff[i]
} else {
Paid_Views.irf_results[i] <-0
}
}
Paid_Views.irf_results
## [,1]
## [1,] 0
## [2,] 0
## [3,] 0
## [4,] 0
## [5,] 0
## [6,] 0
## [7,] 0
## [8,] 0
lr_paid_views <- sum(Paid_Views.irf_results)
lr_paid_views
## [1] 0
Once the t>1 rule is applied, we find that the paid views advertising has no significant and positive impact on all eight periods studied. A 1% increase in paid views advertising spending will increase the firm’s sales by 0% in the long run.
# Organic Views
Organic_Views.irf_results <- matrix(nrow = 8, ncol = 1)
for (i in 1:8) {
se <- (irfs$Upper$LOrganic_Views.diff[i]-irfs$Lower$LOrganic_Views.diff[i])/(2*1.96)
Organic_Views.irf_t <- irfs$irf$LOrganic_Views.diff[i]/se
if (Organic_Views.irf_t>1) {
Organic_Views.irf_results[i] <- irfs$irf$LOrganic_Views.diff[i]
} else {
Organic_Views.irf_results[i] <-0
}
}
Organic_Views.irf_results
## [,1]
## [1,] 0
## [2,] 0
## [3,] 0
## [4,] 0
## [5,] 0
## [6,] 0
## [7,] 0
## [8,] 0
lr_organic_views <- sum(Organic_Views.irf_results)
lr_organic_views
## [1] 0
Similarly to paid views advertising, the organic views advertising has no significant and positive impact. A 1% increase in organic views advertising spending will increase the firm’s sales by 0% in the long run.
# Google Impressions
Google_Impressions.irf_results <- matrix(nrow = 8, ncol = 1)
for (i in 1:8) {
se <- (irfs$Upper$LGoogle_Impressions.diff[i]-irfs$Lower$LGoogle_Impressions.diff[i])/(2*1.96)
Google_Impressions.irf_t <- irfs$irf$LGoogle_Impressions.diff[i]/se
if (Google_Impressions.irf_t>1) {
Google_Impressions.irf_results[i] <- irfs$irf$LGoogle_Impressions.diff[i]
} else {
Google_Impressions.irf_results[i] <-0
}
}
Google_Impressions.irf_results
## [,1]
## [1,] 0
## [2,] 0
## [3,] 0
## [4,] 0
## [5,] 0
## [6,] 0
## [7,] 0
## [8,] 0
lr_google_impressions <- sum(Google_Impressions.irf_results)
lr_google_impressions
## [1] 0
The Google impressions advertising has no significant and positive impact with a 1% increase in Google impressions advertising spending increasing the firm’s sales by 0% in the long run.
# Email Impressions
Email_Impressions.irf_results <- matrix(nrow = 8, ncol = 1)
for (i in 1:8) {
se <- (irfs$Upper$LEmail_Impressions[i]-irfs$Lower$LEmail_Impressions[i])/(2*1.96)
Email_Impressions.irf_t <- irfs$irf$LEmail_Impressions[i]/se
if (Email_Impressions.irf_t>1) {
Email_Impressions.irf_results[i] <- irfs$irf$LEmail_Impressions[i]
} else {
Email_Impressions.irf_results[i] <-0
}
}
Email_Impressions.irf_results
## [,1]
## [1,] 0.05409493
## [2,] 0.00000000
## [3,] 0.02602730
## [4,] 0.00000000
## [5,] 0.00000000
## [6,] 0.00000000
## [7,] 0.00000000
## [8,] 0.00000000
lr_email_impressions <- sum(Email_Impressions.irf_results)
lr_email_impressions
## [1] 0.08012222
The email impressions advertising has a significant and positive impact in the first and third periods. A 1% increase in email impressions advertising spending will increase the firm’s sales by 0.08% in the long run.
# Facebook Impressions
Facebook_Impressions.irf_results <- matrix(nrow = 8, ncol = 1)
for (i in 1:8) {
se <- (irfs$Upper$LFacebook_Impressions.diff[i]-irfs$Lower$LFacebook_Impressions.diff[i])/(2*1.96)
Facebook_Impressions.irf_t <- irfs$irf$LFacebook_Impressions.diff[i]/se
if (Facebook_Impressions.irf_t>1) {
Facebook_Impressions.irf_results[i] <- irfs$irf$LFacebook_Impressions.diff[i]
} else {
Facebook_Impressions.irf_results[i] <-0
}
}
Facebook_Impressions.irf_results
## [,1]
## [1,] 0.00000000
## [2,] 0.04401091
## [3,] 0.00000000
## [4,] 0.01250019
## [5,] 0.00000000
## [6,] 0.00000000
## [7,] 0.00000000
## [8,] 0.00000000
lr_facebook_impressions <- sum(Facebook_Impressions.irf_results)
lr_facebook_impressions
## [1] 0.0565111
The facebook impressions advertising has a significant and positive impact in period 2. A 1% increase in facebook impressions advertising spending will increase the firm’s sales by 0.044% in the long run.
# Affiliate Impressions
Affiliate_Impressions.irf_results <- matrix(nrow = 8, ncol = 1)
for (i in 1:8) {
se <- (irfs$Upper$LAffiliate_Impressions.diff[i]-irfs$Lower$LAffiliate_Impressions.diff[i])/(2*1.96)
Affiliate_Impressions.irf_t <- irfs$irf$LAffiliate_Impressions.diff[i]/se
if (Affiliate_Impressions.irf_t>1) {
Affiliate_Impressions.irf_results[i] <- irfs$irf$LAffiliate_Impressions.diff[i]
} else {
Affiliate_Impressions.irf_results[i] <-0
}
}
Affiliate_Impressions.irf_results
## [,1]
## [1,] 0
## [2,] 0
## [3,] 0
## [4,] 0
## [5,] 0
## [6,] 0
## [7,] 0
## [8,] 0
lr_affiliate_impressions <- sum(Affiliate_Impressions.irf_results)
lr_affiliate_impressions
## [1] 0
The affiliate impressions advertising has no significant and positive impact. A 1% increase in affiliate impressions advertising spending thus will increase the firm’s sales by 0% in the long run.
# Current budget allocation
cost_paid_views <- sum(data$Paid_Views)
cost_organic_views <- sum(data$Organic_Views)
cost_google_impressions <- sum(data$Google_Impressions)
cost_email_impressions <-sum(data$Email_Impressions)
cost_facebook_impressions <- sum(data$Facebook_Impressions)
cost_affiliate_impressions <- sum(data$Affiliate_Impressions)
cost_total <- cost_paid_views + cost_paid_views + cost_google_impressions + cost_email_impressions + cost_facebook_impressions + cost_affiliate_impressions
# Share of current budget allocation
costshare_paid_views <- cost_paid_views/cost_total
costshare_organic_views <- cost_organic_views/cost_total
costshare_google_impressions <- cost_google_impressions/cost_total
costshare_email_impressions <- cost_email_impressions/cost_total
costshare_facebook_impressions <- cost_facebook_impressions/cost_total
costshare_affiliate_impressions <- cost_affiliate_impressions/cost_total
# Pie Chart
actual_shares <- c(costshare_paid_views, costshare_organic_views, costshare_google_impressions, costshare_email_impressions, costshare_facebook_impressions, costshare_affiliate_impressions)
labels <- c("Paid_Views", "Organic_Views", "Google_Impressions", "Email_Impressions", "Facebook_Impressions", "Affiliate_Impressions")
actual_percentages <- round(actual_shares*100)
labels <- paste(labels, actual_percentages)
labels <- paste(labels, "%", sep="")
# Get the pie-chart
pie(actual_shares, labels=labels, main="Current Budget Allocation")
We now figure out the optimal budget allocation.
# Get the coefficients from IRF results
beta_paid_views <- lr_paid_views
beta_organic_views <- lr_organic_views
beta_google_impressions <- lr_google_impressions
beta_email_impressions <- lr_email_impressions
beta_facebook_impressions <- lr_facebook_impressions
beta_affiliate_impressions <- lr_affiliate_impressions
# The sum of all elasticities
beta_all <- beta_paid_views + beta_organic_views + beta_google_impressions + beta_email_impressions + beta_facebook_impressions + beta_affiliate_impressions
# Optimal resource allocation
optim_paid_views <- beta_paid_views/beta_all
optim_organic_views <- beta_organic_views/beta_all
optim_google_impressions <- beta_google_impressions/beta_all
optim_email_impressions <- beta_email_impressions/beta_all
optim_facebook_impressions <- beta_facebook_impressions/beta_all
optim_affiliate_impressions <- beta_affiliate_impressions/beta_all
This optimal budget allocation can be summarised in a new pie chart.
# Optimal spending
optimal_spend <- c(optim_paid_views, optim_organic_views, optim_google_impressions, optim_email_impressions, optim_facebook_impressions, optim_affiliate_impressions)
optimal_spend = round(optimal_spend, digits=5)
optimal_spend
## [1] 0.0000 0.0000 0.0000 0.5864 0.4136 0.0000
optimal_shares <- c(optim_paid_views, optim_organic_views, optim_google_impressions, optim_email_impressions, optim_facebook_impressions, optim_affiliate_impressions)
optimal_labels<-c("Paid View", "Organic Views", "Google Impressions", "Email Impressions", "Facebook Impressions", "Affiliate Impressions")
optimal_percentages <- round(optimal_shares*100)
optimal_labels <- paste(optimal_labels, optimal_percentages)
optimal_labels <- paste(optimal_labels, "%", sep="")
# Get the pie-chart
pie(optimal_shares, labels=optimal_labels, main="Optimal Budget Allocation" )
The firm should focus their marketing spending only on email and facebook impressions.